#install.packages("rlang")
#library(rlang)
library(tidyverse)
library(haven)
library(formatR)
library(lubridate)
library(smooth)
library(forecast)
library(scales)
library(ggplot2)
library(readxl)
library(tidyverse)
library(data.table)
library(quantmod)
library(geofacet)
library(janitor)
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE
)
Combine past years: All revenue files are in a revenue
folder that I reference when I set the working directory. When adding
new fiscal years, put the the newest year of data for revenue and
expenditures in their respective folders.
Pre-FY2022
The code below chunk takes the .dta files for all fiscal years before FY 2022 and binds them together. Variable names were manually changed by past researchers so that they were consistent across years.
Additional variables are created: object, category, sequence, type, trans_agency, trans_type
trans_agency and trans_type are only for transfers. You can search for “transfers” under the variable “org_name”
setwd("C:/Users/aleaw/OneDrive/Desktop/PhD Fall 2021 - Spring 2022/Merriman RA/Fiscal Futures FY2022/FY2021 replication/revenue")
# does all of stata code lines 1-514 of combining yearly data
allrevfiles = list.files(path = "C:/Users/aleaw/OneDrive/Desktop/PhD Fall 2021 - Spring 2022/Merriman RA/Fiscal Futures FY2022/FY2021 replication/revenue", pattern = ".dta") %>% lapply(read_dta) %>% bind_rows
#Fy21: 62295 observations, 13 variables
#FY22: 65094 obs, 13 vars
#write_csv(allrevfiles, "allrevfiles.csv")
Reads in dta file and leaves fund as a character. No longer have to worry about preserving leading zeros in categories like the fund numbers. State code used to force fund, source, and from_fund to be 4 digits long and preserve leading zeros and fund was 3 digits long with leading zeros.
setwd("C:/Users/aleaw/OneDrive/Desktop/PhD Fall 2021 - Spring 2022/Merriman RA/Fiscal Futures FY2022/FY2021 replication/expenditures")
allexpfiles = list.files(path = "C:/Users/aleaw/OneDrive/Desktop/PhD Fall 2021 - Spring 2022/Merriman RA/Fiscal Futures FY2022/FY2021 replication/expenditures", pattern = ".dta") %>% lapply(read_dta) %>% bind_rows
#fy21 213372 observations, 20 variables
# fy22 225587 obs, 21 vars.
#write_csv(allexpfiles, "allexpfiles.csv")
Code below reads in the csv files created in chunks above (allrevfiles.csv and allrexpfiles.csv). These files contain all years of data combined into one file BEFORE any recoding is done. Do not use this file for summing categories because it is just an in between step before recoding revenue and expenditure categories.
# combined in past chunks called create-rev-csv and create-exp-csv
allrevfiles <- read_csv("allrevfiles.csv") #combined but not recoded
allexpfiles <- read_csv("allexpfiles.csv") #combined but not recoded
Normally, when your receive the new fiscal year files from the Comptrollers office, you will need to change the variable names so that they are consistent with past years. This is an example of reading in the new file and changing the variable names.
For FY 2022 and after, .dta files can be avoided entirely and .csv files and R code will be used.All files before this year had been saved and passed on as .dta files for Stata code before the transition to R in Fall 2022
Example code below: Read in excel file and rename columns so that it plays well with the other years’ files.
read_xlsx("Fis_Fut_Rev_2022.xlsx") %>%
rename(fy = 'FISCAL YEAR',
fund = 'FUND #',
fund_name = 'FUND NAME',
agency = 'AGENCY #',
agency_name = 'AGENCY NAME',
source = 'REVENUE SOURCE #',
source_name = 'REV SRC NAME',
receipts = 'REVENUE YTD AMOUNT'
) %>%
# do these come from funds_ab_whatever file?
mutate(fund_cat = FIND_COLUMN, #create fund_cat column
fund_cat_name = FIND_NAME) # create fund_cat_name column
Identify new and reused funds for newest fiscal year. Recode funds to take into account different fund numbers/names over the years. Update fund_ab_in_2021.xlsx with any changes from previous fiscal year.
Clarify and add steps for identifying new and reused funds.
For funds that were reused once, a 9 replaces the 0 as the first
digit. If reused twice, then the first two values are 10.
- Ex. 0350 –> 9350 because its use changed.
- Ex. 0367 becomes 10367 because its use has changed twice now. There
was fund 0367 originally, then its use changed and it was recoded as
9367, and now it changed again so it is a 10367.
# if first character is a 0, replace with a 9
rev_1998_2022 <- allrevfiles %>%
mutate(fund = ifelse(fy < 2002 & fund %in% c("0730", "0241", "0350", "0367", "0381", "0382", "0526", "0603", "0734", "0913", "0379"), str_replace(fund, "0","9"), fund)) %>%
mutate(fund = ifelse(fy < 2008 & fund %in% c("0027", "0033", "0037", "0058", "0062", "0066", "0075", "0083", "0116", "0119", "0120", "0122", "0148", "0149", "0157", "0158", "0166", "0194", "0201", "0209", "0211", "0217", "0223", "0231", "0234", "0253", "0320", "0503", "0505", "0512", "0516", "0531", "0532", "0533", "0547", "0563", "0579", "0591", "0606", "0616", "0624", "0659", "0662", "0665", "0676", "0710",
"0068", "0076", "0115", "0119", "0168", "0182", "0199", "0241", "0307", "0506", "0509", "0513"), str_replace(fund, "0","9"), fund)) %>%
mutate(fund = ifelse(fy < 2016 & fund %in% c("0263", "0399", "0409"), str_replace(fund, "0","9"), fund)) %>%
mutate(fund = ifelse(fy < 2017 & fund == "0364", str_replace(fund, "0","9"), fund)) %>%
mutate(fund = ifelse(fy < 2018 & fund %in% c("0818", "0767", "0671", "0593", "0578"), str_replace(fund, "0","9"), fund)) %>%
mutate(fund = ifelse(fy>1999 & fy < 2018 & fund == "0231", "10231", fund) ) %>%
mutate(fund = ifelse(fy < 2019 & fund %in% c("0161", "0489", "0500", "0612", "0893", "0766"), str_replace(fund, "0","9"), fund)) %>%
mutate(fund = ifelse(fy < 2020 & fund %in% c("0254", "0304", "0324", "0610", "0887", "0908", "0939", "0968"), str_replace(fund, "0","9"), fund)) %>%
mutate(fund = ifelse(fy < 2021 & fund %in% c("0255", "0325", "0348", "0967", "0972"), str_replace(fund, "0","9"), fund))
# if first character is a 0, replace with a 9
exp_1998_2022 <- allexpfiles %>%
mutate(fund = ifelse(fy < 2002 & fund %in% c("0730", "0241", "0350", "0367", "0381", "0382", "0526", "0603", "0734", "0913", "0379"), str_replace(fund, "0","9"), fund)) %>%
mutate(fund = ifelse(fy < 2008 & fund %in% c("0027", "0033", "0037", "0058", "0062", "0066", "0075", "0083", "0116", "0119", "0120", "0122", "0148", "0149", "0157", "0158", "0166", "0194", "0201", "0209", "0211", "0217", "0223", "0231", "0234", "0253", "0320", "0503", "0505", "0512", "0516", "0531", "0532", "0533", "0547", "0563", "0579", "0591", "0606", "0616", "0624", "0659", "0662", "0665", "0676", "0710",
"0068", "0076", "0115", "0119", "0168", "0182", "0199", "0241", "0307", "0506", "0509", "0513"), str_replace(fund, "0","9"), fund)) %>%
mutate(fund = ifelse(fy < 2016 & fund %in% c("0263", "0399", "0409"), str_replace(fund, "0","9"), fund)) %>%
mutate(fund = ifelse(fy < 2017 & fund == "0364", str_replace(fund, "0","9"), fund)) %>%
mutate(fund = ifelse(fy < 2018 & fund %in% c("0818", "0767", "0671", "0593", "0578"), str_replace(fund, "0","9"), fund)) %>%
mutate(fund = ifelse(fy>1999 & fy < 2018 & fund == "0231", "10231", fund) ) %>%
mutate(fund = ifelse(fy < 2019 & fund %in% c("0161", "0489", "0500", "0612", "0893", "0766"), str_replace(fund, "0","9"), fund)) %>%
mutate(fund = ifelse(fy < 2020 & fund %in% c("0254", "0304", "0324", "0610", "0887", "0908", "0939", "0968"), str_replace(fund, "0","9"), fund)) %>%
mutate(fund = ifelse(fy < 2021 & fund %in% c("0255", "0325", "0348", "0967", "0972"), str_replace(fund, "0","9"), fund))
Note:
exp:1998_2022and thefunds_ab_in_2021dataframes have a fund_cat_name variable (AND THEY DONT MATCH 100%) which ends up creating a .x and .y version of the variable when they are joined together. Inspect this more later. It is not a huge concern because the fund number is what matters more.
funds_ab_in_2021 = read_excel("C:/Users/aleaw/OneDrive/Desktop/PhD Fall 2021 - Spring 2022/Merriman RA/Fiscal Futures FY2022/FY2021 replication/funds_ab_in_2021.xlsx")
exp_temp <- exp_1998_2022 %>%
arrange(fund, fy) %>%
filter(expenditure != 0) %>% # keeps everything that is not zero
# join funds_ab_in_2021 to exp_temp
left_join(funds_ab_in_2021, by = "fund") # matches most recent fund number and name
exp_1998_2022 and rev_1998_2022. These are
then saved as exp_temp and rev_temp while recoding variables. This is
BEFORE category groups are created and cleaned below. Only a temporary
file, do not use for analysis.# remove from computer memory to free up space (in case your computer needs it)
rm(allexpfiles)
rm(allrevfiles)
Aggregate expenditures: Save tax refunds as negative revenue. Code refunds to match the rev_type codes (02=income taxes, 03 = corporate income taxes, 06=sales tax, 09=motor fuel tax, 24=insurance taxes and fees, 35 = all other tax refunds)
## negative revenue becomes tax refunds
tax_refund_long <- exp_temp %>%
# fund != "0401" # removes State Trust Funds
filter(fund != "0401" & (object=="9910"|object=="9921"|object=="9923"|object=="9925")) %>%
# keeps these objects which represent revenue, insurance, treasurer,and financial and professional reg tax refunds
mutate(refund = case_when(
fund=="0278" & sequence == "00" ~ "02", # for income tax refund
fund=="0278" & sequence == "01" ~ "03", # tax administration and enforcement and tax operations become corporate income tax refund
fund == "0278" & sequence == "02" ~ "02",
object=="9921" ~ "21", # inheritance tax and estate tax refund appropriation
object=="9923" ~ "09", # motor fuel tax refunds
obj_seq_type == "99250055" ~ "06", # sales tax refund
fund=="0378" & object=="9925" ~ "24", # insurance privilege tax refund
fund=="0001" & object=="9925" ~ "35", #all other taxes
T ~ "CHECK")) # if none of the items above apply to the observations, then code them as 00
exp_temp <- left_join(exp_temp, tax_refund_long) %>%
mutate(refund = ifelse(is.na(refund),"not refund", as.character(refund)))
tax_refund <- tax_refund_long %>%
group_by(refund, fy)%>%
summarize(refund_amount = sum(expenditure, na.rm = TRUE)/1000000) %>%
pivot_wider(names_from = refund, values_from = refund_amount, names_prefix = "ref_") %>%
mutate_all(~replace_na(.,0)) %>%
arrange(fy)
exp_temp <- exp_temp %>% filter(refund == "not refund")
# remove the items we recoded in tax_refund_long
#exp_temp <- anti_join(exp_temp, tax_refund_long) # should be 156 fewer observations after antijoin
tax_refund will ultimately be removed from expenditure
totals and instead subtracted from revenue totals (since they were tax
refunds).
# early agencies replaced by successors
# recodes old agency numbers to consistent agency number
exp_temp <- exp_temp %>%
mutate(agency = case_when(
(agency=="438"| agency=="475" |agency == "505") ~ "440",
# financial institution & professional regulation &
# banks and real estate --> coded as financial and professional reg
agency == "473" ~ "588", # nuclear safety moved into IEMA
(agency =="531" | agency =="577") ~ "532", # coded as EPA
(agency =="556" | agency == "538") ~ "406", # coded as agriculture
agency == "560" ~ "592", # IL finance authority (fire trucks and agriculture stuff)to state fire marshal
agency == "570" & fund == "0011" ~ "494", # city of Chicago road fund to transportation
TRUE ~ (as.character(agency))))
State payments to the following pension systems:
• Teachers Retirement System (TRS)
• State Employee Retirement System (SERS)
• State University Retirement System (SURS)
• Judges Retirement System (JRS)
• General Assembly Retirement System (GARS)
Operating costs of administering the pensions are not included in this category. Fiscal Futures only includes the state’s payments into the pension funds as “pension expenditures.” Note also that these payments are subtracted from reported agency spending in calculating other categories.
obj_seq_type== “11600000” should NOT be included in pensions, correct?
why are local health insurance reserves included as pensions?
& fund != “0183” & appr_org != “55”
exp_temp <- exp_temp %>%
arrange(fund) %>%
mutate(pension = case_when(
# objects were weird for 2010 and 2011
(object=="4431" & fund=="0473" & (fy==2010 | fy==2011)) ~ 3, # teachers retirement system,
(object=="1298" & (fy==2010 | fy==2011) & (fund=="0477" | fund=="0479" | fund=="0481")) ~ 3, #judges retirement
(object=="4431" | (object>"1159" & object<"1166") ) ~ 1, # 4431 = easy to find pension items
# objects 1159 to 1166 are other objects to check later for in_ff=0
fund == "0825" ~ 4, # pension obligation acceleration bond
TRUE ~ 0))
#special accounting of pension obligation bond (POB)-funded contributions to JRS, SERS, GARS, TRS
table(exp_temp$pension)
##
## 0 1 3 4
## 154425 8814 8 8
exp_temp <- exp_temp %>%
mutate(object = ifelse((pension == 3 & in_ff == "0"), "4431", object)) %>% # why this step?
mutate(pension = ifelse(pension ==1 & in_ff == "0", 2, pension)) %>% # coded as 2 if it was supposed to be excluded. Allows or checking work in between steps.
mutate(in_ff = ifelse((pension ==2 | pension ==3 | pension == 4), "1", in_ff))
table(exp_temp$pension)
##
## 0 1 2 3 4
## 154425 8666 148 8 8
# create file with all pension items to find any mistakes
exp_temp %>%
filter(pension > 0) %>%
write_csv("all_pensions.csv")
exp_temp %>%
filter(pension > 0)
Pension = 2 represents retirement pension payments that were excluded from the fiscal futures analysis by default (in_ff=0) but should be included. Pension2_amt should be added to revenue side.
Summarizes the total expenditures for each pension code for each year. Pension contributions to be added to “Other Revenues” in later steps:
#creates long version without any aggregation
pension_2_long <- exp_temp %>%
filter(pension == 2 ) %>%
rename(year = fy)
exp_temp <- anti_join(exp_temp, pension_2_long) # 150 observations removed with antijoin
pension2_fy22<- pension_2_long %>%
group_by(year) %>%
summarize(pension_amt = sum(expenditure)/1000000)
pension2_fy22 # used in final tables
# items flagged as pensions get agency code 901 for State Pension Contributions
exp_temp <- exp_temp %>%
mutate(agency = ifelse(pension>0, "901", as.character(agency)),
agency_name = ifelse(agency == "901", "State Pension Contributions", as.character(agency_name)))
transfers_drop <- exp_temp %>% filter(
agency == "799" | # statutory transfers
object == "1993" | # interfund cash transfers
object == "1298") # purchase of investments
exp_temp <- anti_join(exp_temp, transfers_drop) # 13650 obs dropped with antijoin
Employer contributions for group insurance (contributions count as a
revenue source).
Creates the employee health costs amount to be added to the revenue
side: Employer contributions are a revenue source and should be
subtracted from state employee healthcare costs (expenditures + premiums
= net costs).
Added line of code Sept. 21 2022: eehc = ifelse(obj_seq_type == “19000000”, 1, eehc)) %>%
# identifies eehc values that would have been excluded due to in_ff == 0 before recoding
eehc_2_long <- exp_temp %>%
mutate(eehc = ifelse(object == "1180", 1, 0)) %>%
mutate(eehc = ifelse(obj_seq_type == "19000000" & fy > 2020, 1, eehc) ) %>%
mutate(eehc = ifelse((eehc == 1 & in_ff =="0"), 2, eehc)) %>% # if eehc == 1 AND in_ff was zero, then recode eehc to 2, otherwise leave eehc as it was. Mostly helps flag things that would have been excluded due to default in_ff coding
mutate(in_ff = ifelse(eehc == 2, "1", in_ff) ) %>% # recodes in_ff to 1 if eehc was coded to 2 to make sure they are included in fiscal futures.
filter(eehc == 2) # keeps only eehc == 2, items that would have been excluded based on in_ff original coding
# 146 observations
# summarizes by year totals for state employee healthcare costs == 2
eehc2_amt <- eehc_2_long %>% group_by(fy) %>%
summarize(eehc = sum(expenditure)/1000000)
# avoid double counting by recoding
exp_temp <- exp_temp %>% mutate(expenditure = ifelse(object == "1180" | (obj_seq_type == "19000000" & fy > 2020), 0, expenditure)) %>% filter(expenditure > 0)
# doesn't work but would be easier to drop eehc values from exp_temp
# exp_temp <- anti_join(exp_temp, eehc_2_long)
#, by = c("fy", "fund", "fund_name", "agency", "agency_name", "appr_org", "org_name", "obj_seq_type", "appn_net_xfer", "expenditure", "data_source", "object", "category", "sequence", "type", "trans_agency", "trans_type", "wh_approp_name"))
# should remove the 146 observations from exp_temp
# 21337 - 149 = 21188 obs (expected value after antijoin)
State Employee Health Care = Sum of expenditures for “health care coverage as elected by members per state employees group insurance act.” The payments are made from the Health Insurance Reserve Fund. We subtract the share that came from employee contributions.Employee contributions are not considered a revenue source or an expenditure in our analysis.
exp_temp <- exp_temp %>%
mutate(agency = case_when( # turns specific items into State Employee Healthcare (agency=904)
fund=="0907" & (agency=="416" & appr_org=="20") ~ "904", # central management is agency 416
fund=="0907" & (agency=="478" & appr_org=="80") ~ "904", # agency = 478: healthcare & family services
fund=="0001" & appr_org=="20" & object=="1900" & agency=="416" & (fy>2002 & fy<2006) ~ "904",
fund=="0001" & appr_org=="20" & object=="1900" & agency=="416" & (fy>2020) ~ "904",
TRUE ~ as.character(agency))) %>%
mutate(agency_name = ifelse(agency == "904", "STATE EMPLOYEE HEALTHCARE", as.character(agency_name)),
group = ifelse(agency == "904", "904", as.character(agency))) # creates group variable. Default is group = agency number
Separate transfers to local from parent agencies come from DOR(492) or Transportation (494). Treats muni revenue transfers as expenditures, not negative revenue.
The share of certain taxes levied state-wide at a common rate and then transferred to local governments. (Purely local-option taxes levied by specific local governments with the state acting as collection agent are not included.)
The five corresponding revenue items are:
• Local share of Personal Income Tax
• Local share of General Sales Tax
• Personal Property Replacement Tax on Business Income
• Personal Property Replacement Tax on Public Utilities
• Local share of Motor Fuel Tax
exp_temp <- exp_temp %>% mutate(
agency = case_when(fund=="0515" & object=="4470" & type=="08" ~ "971", # income tax
fund=="0515" & object=="4491" & type=="08" & sequence=="00" ~ "971",
fund=="0802" & object=="4491" ~ "972", #pprt transfer
fund=="0515" & object=="4491" & type=="08" & sequence=="01" ~ "976", #gst to local
fund=="0627" & object=="4472"~ "976" ,
fund=="0648" & object=="4472" ~ "976",
fund=="0515" & object=="4470" & type=="00" ~ "976",
object=="4491" & (fund=="0188"|fund=="0189") ~ "976",
fund=="0187" & object=="4470" ~ "976",
fund=="0186" & object=="4470" ~ "976",
object=="4491" & (fund=="0413"|fund=="0414"|fund=="0415") ~ "975", #mft to local
TRUE ~ as.character(agency)),
agency_name = case_when(agency == "971"~ "INCOME TAX 1/10 TO LOCAL",
agency == "972" ~ "PPRT TRANSFER TO LOCAL",
agency == "976" ~ "GST TO LOCAL",
agency == "975" ~ "MFT TO LOCAL",
TRUE~as.character(agency_name)),
group = ifelse(agency>"970" & agency < "977", as.character(agency), as.character(group)))
table(exp_temp$group)
##
## 101 102 103 105 107 108 109 110 112 115 120 131 140
## 552 1 234 154 89 190 136 128 162 127 15 350 7
## 155 156 167 201 210 275 285 290 295 310 330 340 350
## 75 114 117 1333 13 362 233 457 1100 208 202 753 3755
## 360 370 402 406 416 418 420 422 425 426 427 440 442
## 1498 770 1695 4411 3620 2369 10553 8824 954 7222 706 3170 572
## 444 445 446 448 452 458 466 478 482 492 493 494 497
## 10761 19 978 15 583 273 571 2844 5164 3745 1852 9203 2386
## 503 506 507 509 510 511 517 520 524 525 526 527 528
## 408 10 319 30 22 8775 127 3 988 28 168 39 1809
## 529 532 534 537 540 541 542 546 548 554 555 557 558
## 18 5213 5 189 64 1281 172 787 245 25 24 204 245
## 559 562 563 564 565 567 568 569 571 574 575 576 578
## 231 18 649 15 169 160 2 414 65 74 85 1 223
## 579 580 583 585 586 587 588 589 590 591 592 593 598
## 402 303 19 43 4886 678 2446 550 165 186 982 141 10
## 601 608 612 616 620 628 636 644 664 676 684 691 692
## 688 165 128 136 89 136 108 171 253 425 853 871 756
## 693 695 901 904 971 972 975 976
## 2 196 8682 49 24 24 72 1112
exp_temp <- exp_temp %>% filter(in_ff != 0) # drops in_ff = 0 funds AFTER dealing with net-revenue above
# 149309 obs to 145190 obs after filtering !=0
Principal and interest payment on both short-term and long-term debt. We do not include escrow payments.
exp_temp <- exp_temp %>%
mutate(agency = if_else(object>"7999" & object<"9000" & fund!="0455", "903", as.character(agency)),
agency_name = if_else(agency == "903", "DEBT SERVICE", as.character(agency_name)),
group = if_else(agency == "903", "903", as.character(group)))
exp_temp<- exp_temp %>%
#mutate(agency = as.numeric(agency) ) %>%
# arrange(agency)%>%
mutate(
group = case_when(
agency>"100"& agency<"200" ~ "910", # legislative
agency == "528" | (agency>"200" & agency<"300") ~ "920", # judicial
(agency>"309" & agency<"400") ~ "930", # elected officers
agency == "586" ~ "959", # create new K-12 group
agency=="402" | agency=="418" | agency=="478" | agency=="444" | agency=="482" ~ as.character(agency), # aging, CFS,HFS, human services, public health
T ~ as.character(group)),
#chip = ifelse(fund == "0001" & agency == "478" & appr_org == "65" &object=="4900" & (sequence == "20" | sequence == "54" | sequence == "61" | sequence == "62" | sequence == "65"),1 ,0)
) %>%
mutate(group = case_when(
agency=="478" & (appr_org=="01" | appr_org == "65" | appr_org=="88") & (object=="4900" | object=="4400") ~ "945", # separates CHIP from health and human services and saves it as Medicaid
agency == "586" & fund == "0355" ~ "478", # 586 (Board of Edu) has special education which is part of medicaid
#agency == "586" & appr_org == "18" ~ "945", # Spec. Edu Medicaid Matching
agency=="425" | agency=="466" | agency=="546" | agency=="569" | agency=="578" | agency=="583" | agency=="591" | agency=="592" | agency=="493" | agency=="588" ~ "941", # public safety & Corrections
agency=="420" | agency=="494" | agency=="406" | agency=="557" ~ as.character(agency), # econ devt & infra
agency=="511" | agency=="554" | agency=="574" | agency=="598" ~ "946", # Capital improvement
agency=="422" | agency=="532" ~ as.character(agency), # environment & nat. resources
agency=="440" | agency=="446" | agency=="524" | agency=="563" ~ "944", # business regulation
agency=="492" ~ "492", # revenue
agency == "416" ~ "416", # central management services
agency=="448" & fy > 2016 ~ "416", #add DoIT to central management
T ~ as.character(group))) %>%
mutate(group = case_when(
agency=="684" | agency=="691" ~ as.character(agency),
agency=="692" | agency=="695" | (agency>"599" & agency<"677") ~ "960", # higher education
agency=="427" ~ as.character(agency), # employment security
agency=="507"| agency=="442" | agency=="445" | agency=="452" |agency=="458" | agency=="497" ~ "948", # other departments
# other boards & Commissions
agency=="503" | agency=="509" | agency=="510" | agency=="565" |agency=="517" | agency=="525" | agency=="526" | agency=="529" | agency=="537" | agency=="541" | agency=="542" | agency=="548" | agency=="555" | agency=="558" | agency=="559" | agency=="562" | agency=="564" | agency=="568" | agency=="579" | agency=="580" | agency=="587" | agency=="590" | agency=="527" | agency=="585" | agency=="567" | agency=="571" | agency=="575" | agency=="540" | agency=="576" | agency=="564" | agency=="534" | agency=="520" | agency=="506" | agency == "533" ~ "949",
# non-pension expenditures of retirement funds moved to "Other Departments"
agency=="131" | agency=="275" | agency=="589" |agency=="593"|agency=="594"|agency=="693" ~ "948",
T ~ as.character(group))) %>%
mutate(group_name =
case_when(
group == "900" ~ "NOT IN FRAME",
group == "901" ~ "STATE PENSION CONTRIBUTION",
group == "903" ~ "DEBT SERVICE",
group == "910" ~ "LEGISLATIVE" ,
group == "920" ~ "JUDICIAL" ,
group == "930" ~ "ELECTED OFFICERS" ,
group == "940" ~ "OTHER HEALTH-RELATED",
group == "941" ~ "PUBLIC SAFETY" ,
group == "942" ~ "ECON DEVT & INFRASTRUCTURE" ,
group == "943" ~ "CENTRAL SERVICES",
group == "944" ~ "BUS & PROFESSION REGULATION" ,
group == "945" ~ "MEDICAID" ,
group == "946" ~ "CAPITAL IMPROVEMENT" ,
group == "948" ~ "OTHER DEPARTMENTS" ,
group == "949" ~ "OTHER BOARDS & COMMISSIONS" ,
group == "959" ~ "K-12 EDUCATION" ,
group == "960" ~ "UNIVERSITY EDUCATION" ,
group == agency ~ as.character(group),
TRUE ~ "Check name"),
year = fy)
table(exp_temp$group)
##
## 402 406 416 418 420 422 426 427 444 478 482 492 494
## 1695 4359 3304 2369 10536 8819 7220 704 10730 1489 5163 3003 9193
## 532 557 684 691 901 903 904 910 920 930 941 944 945
## 5171 204 853 848 8682 211 49 2101 4918 6635 8405 5665 842
## 946 948 949 959 960 971 972 975 976
## 8826 4219 5361 4838 2916 24 24 72 1112
# number of observations within each group category
table(exp_temp$group_name)
##
## 402 406
## 1695 4359
## 416 418
## 3289 2369
## 420 422
## 10536 8819
## 426 427
## 7220 704
## 444 478
## 10730 1485
## 482 492
## 5163 3003
## 494 532
## 9193 5171
## 557 684
## 204 853
## 691 904
## 848 49
## 971 972
## 24 24
## 975 976
## 72 1112
## BUS & PROFESSION REGULATION CAPITAL IMPROVEMENT
## 5665 8826
## Check name DEBT SERVICE
## 19 211
## ELECTED OFFICERS JUDICIAL
## 6635 4918
## K-12 EDUCATION LEGISLATIVE
## 4838 2101
## MEDICAID OTHER BOARDS & COMMISSIONS
## 842 5361
## OTHER DEPARTMENTS PUBLIC SAFETY
## 4219 8405
## STATE PENSION CONTRIBUTION UNIVERSITY EDUCATION
## 8682 2916
transfers_long <- exp_temp %>%
filter(group == "971" |group == "972" | group == "975" | group == "976")
transfers <- transfers_long %>%
group_by(year, group ) %>%
summarize(sum_expenditure = sum(expenditure)/1000000) %>%
pivot_wider(names_from = "group", values_from = "sum_expenditure", names_prefix = "exp_" )
exp_temp <- anti_join(exp_temp, transfers_long)
# write_csv(exp_temp, "all_expenditures_recoded.csv")
All expenditures recoded but not aggregated: Allows for inspection of individual expenditures within larger categories. This stage of the data is extremely useful for investigating almost all questions we have about the data.
Note that these are the raw figures BEFORE we take the additional steps:
exp_temp %>%
group_by(year, group) %>%
summarize(sum_expenditure = sum(expenditure)/1000000) %>%
arrange(year) %>%
pivot_wider(names_from = "group", values_from = "sum_expenditure")
aggregate_exp_labeled <- exp_temp %>%
group_by(year, group_name) %>%
summarize(sum_expenditure = sum(expenditure)/1000000) %>%
arrange(year) %>%
pivot_wider(names_from = "group_name", values_from = "sum_expenditure")
aggregate_exp_labeled
For aggregating revenue, use the rev_1998_2022 dataframe, join the funds_ab_in_2021 file to it, and then join the ioc_source_type file to the dataset.
You need to update the ioc_source_type file every year!
include how to do that later
# fund info to revenue for all years
rev_temp <- inner_join(rev_1998_2022, funds_ab_in_2021, by = "fund") %>% arrange(source)
# need to update the ioc_source_type file every year!
ioc_source_type <- read_dta("C:/Users/aleaw/OneDrive/Desktop/PhD Fall 2021 - Spring 2022/Merriman RA/Fiscal Futures FY2022/FY2021 replication/ioc_source_updated21.dta")
rev_temp <- left_join(rev_temp, ioc_source_type, by = "source")
# automatically used source, source name does not match for the join to work using source_name
rev_temp <- rev_temp %>%
mutate(
rev_type = ifelse(rev_type=="57" & agency=="478" & (source=="0618"|source=="2364"|source=="0660"|source=="1552"| source=="2306"| source=="2076"|source=="0676"|source=="0692"), "58", rev_type),
rev_type_name = ifelse(rev_type=="58", "FEDERAL TRANSPORTATION", rev_type_name),
rev_type = ifelse(rev_type=="57" & agency=="494", "59", rev_type),
rev_type_name = ifelse(rev_type=="59", "FEDERAL TRANSPORTATION", rev_type_name),
rev_type_name = ifelse(rev_type=="57", "FEDERAL OTHER", rev_type_name),
rev_type = ifelse(rev_type=="6", "06", rev_type),
rev_type = ifelse(rev_type=="9", "09", rev_type))
rev_temp %>%
group_by(fy, rev_type_name) %>%
summarise(receipts = sum(receipts, na.rm = TRUE)/1000000)
#collect optional premiums to fund 0907 for use in eehc expenditure
rev_temp <- rev_temp %>%
mutate(med_option_recent = ifelse(
fund=="0907" & (source=="0120"| source=="0121"| (source>"0345" & source<"0357")|(source>"2199" & source<"2209")), 1, 0),
# adds more rev_types
rev_type = case_when(
fund =="0427" ~ "12", # pub utility tax
fund == "0742" | fund == "0473" ~ "24", # insurance and fees
fund == "0976" ~ "36",# receipts from rev producing
fund == "0392" |fund == "0723" ~ "39", # licenses and fees
fund == "0656" ~ "78", #all other rev sources
TRUE ~ as.character(rev_type)))
#if not mentioned, then rev_type as it was
# optional insurance premiums
med_option_recent <- rev_temp %>%
group_by(fy, med_option_recent) %>%
summarize(med_option_amt_recent = sum(receipts)/1000000) %>%
filter(med_option_recent == 1) %>%
rename(year = fy) %>%
select(-med_option_recent)
med_option_long <- rev_temp %>% filter(med_option_recent == 1)
# 361 observations have med_option_recent == 1
med_option_long %>%
group_by(fy, med_option_recent) %>%
summarize(med_option_amt_recent = sum(receipts)/1000000) %>%
rename(year = fy) %>%
select(-med_option_recent)
rev_temp <- rev_temp %>% filter(med_option_recent != 1)
Still need to add med_option data to Other Revenues
rev_temp <- rev_temp %>%
filter(in_ff == 1) %>%
mutate(local = ifelse(is.na(local), 0, local)) %>%
filter(local != 1)
in_from_out <- c("0847", "0867", "1175", "1176", "1177", "1178", "1181", "1182", "1582", "1592", "1745", "1982", "2174", "2264")
rev_temp <- rev_temp %>%
mutate(rev_type_new = ifelse(source %in% in_from_out, "76", rev_type))
# if source contains any of the codes in in_from_out, code them as 76 (all other rev).
# revenue types to drop
drop_type <- c("32", "45", "51", "66", "72", "75", "79", "98")
# drops Blank, Student Fees, Retirement contributions, proceeds/investments,
# bond issue proceeds, interagency receipts, cook IGT, Prior year refunds.
rev_temp <- rev_temp %>% filter(!rev_type_new %in% drop_type)
# keep observations that do not have a revenue type mentioned in drop_type
table(rev_temp$rev_type_new)
##
## 02 03 06 09 12 15 18 21 24 27 30 31 33
## 144 116 803 120 556 247 43 1419 428 73 653 118 119
## 35 36 39 42 48 54 57 58 59 60 63 76 78
## 629 4978 8628 2569 30 1196 6215 590 219 102 4847 149 10451
## 99
## 756
rev_temp %>%
group_by(fy, rev_type_new) %>%
summarize(total_reciepts = sum(receipts)/1000000) %>%
pivot_wider(names_from = rev_type_new, values_from = total_reciepts, names_prefix = "rev_")
# combines smallest 4 categories to to "Other"
# they were the 4 smallest in past years, are they still the 4 smallest?
rev_temp <- rev_temp %>%
mutate(rev_type_new = ifelse(rev_type=="30" | rev_type=="60" | rev_type=="63" | rev_type=="76" | rev_type=="78" , "78", rev_type_new))
#table(rev_temp$rev_type_new) # check work
State employer contributions (eehc from eehc2_amt) should be moved to Other revenues.
State pension contributions (pension_amt from pension2_fy22)
should be added to Other revenues.
Local Government Transfers (exp_970) should be on the expenditure side.
Subtract employee insurance premiums from 904 (State Employee Healthcare Expenditures - Employee Premiums = Actual state healthcare costs. Subtract med_option_amt_recent in med_option_recent from exp_904 in ff_exp).
ff_rev <- rev_temp %>%
group_by(rev_type_new, fy) %>%
summarize(sum_receipts = sum(receipts, na.rm=TRUE)/1000000 ) %>%
pivot_wider(names_from = "rev_type_new", values_from = "sum_receipts", names_prefix = "rev_")
ff_rev<- left_join(ff_rev, tax_refund)
ff_rev <- left_join(ff_rev, pension2_fy22, by=c("fy" = "year"))
ff_rev <- left_join(ff_rev, eehc2_amt)
ff_rev <- mutate_all(ff_rev, ~replace_na(.,0))
ff_rev <- ff_rev %>%
mutate(rev_02 = rev_02 - ref_02,
rev_03 = rev_03 - ref_03,
rev_06 = rev_06 - ref_06,
rev_09 = rev_09 - ref_09,
rev_21 = rev_21 - ref_21,
rev_24 = rev_24 - ref_24,
rev_35 = rev_35 - ref_35,
rev_78new = rev_78 + pension_amt + eehc
) %>%
select(-c(ref_02:ref_35, rev_76, rev_78, rev_99, rev_NA, pension_amt, eehc))
ff_rev
Since I already pivot_wider()ed the table in the previous code chunk, I now change each column’s name by using rename() to set new variable names. Ideally the final dataframe would have both the variable name and the variable label but I have not done that yet.
aggregate_rev_labels <- ff_rev %>%
rename("INDIVIDUAL INCOME TAXES, gross of local, net of refunds" = rev_02,
"CORPORATE INCOME TAXES, gross of PPRT, net of refunds" = rev_03,
"SALES TAXES, gross of local share" = rev_06 ,
"MOTOR FUEL TAX, gross of local share, net of refunds" = rev_09 ,
"PUBLIC UTILITY TAXES, gross of PPRT" = rev_12,
"CIGARETTE TAXES" = rev_15 ,
"LIQUOR GALLONAGE TAXES" = rev_18,
"INHERITANCE TAX" = rev_21,
"INSURANCE TAXES&FEES&LICENSES, net of refunds" = rev_24 ,
"CORP FRANCHISE TAXES & FEES" = rev_27,
# "HORSE RACING TAXES & FEES" = rev_30, # in Other
"MEDICAL PROVIDER ASSESSMENTS" = rev_31 ,
# "GARNISHMENT-LEVIES " = rev_32 , # dropped
"LOTTERY RECEIPTS" = rev_33 ,
"OTHER TAXES" = rev_35,
"RECEIPTS FROM REVENUE PRODUCNG" = rev_36,
"LICENSES, FEES & REGISTRATIONS" = rev_39 ,
"MOTOR VEHICLE AND OPERATORS" = rev_42 ,
# "STUDENT FEES-UNIVERSITIES" = rev_45, # dropped
"RIVERBOAT WAGERING TAXES" = rev_48 ,
# "RETIREMENT CONTRIBUTIONS " = rev_51, # dropped
"GIFTS AND BEQUESTS" = rev_54,
"FEDERAL OTHER" = rev_57 ,
"FEDERAL MEDICAID" = rev_58,
"FEDERAL TRANSPORTATION" = rev_59 ,
# "OTHER GRANTS AND CONTRACTS" = rev_60, #other
# "INVESTMENT INCOME" = rev_63, # other
# "PROCEEDS,INVESTMENT MATURITIES" = rev_66 , #dropped
# "BOND ISSUE PROCEEDS" = rev_72, #dropped
# "INTER-AGENCY RECEIPTS" = rev_75, #dropped
# "TRANSFER IN FROM OUT FUNDS" = rev_76, #other
"ALL OTHER SOURCES" = rev_78new ,
# "COOK COUNTY IGT" = rev_79, #dropped
# "PRIOR YEAR REFUNDS" = rev_98 #dropped
)
aggregate_rev_labels
# Still contains columns that should be dropped for the clean final aggregate table. Drop the variables I don't want in the output table in the "graphs" section.
Create state employee healthcare costs that reflects the health costs minus the optional insurance premiums that came in (904_new = 904 - med_option_amt_recent).
Create exp_970 for all local government transfers (exp_971 + exp_972 + exp_975 + exp_976).
ff_exp <- exp_temp %>%
group_by(fy, group) %>%
summarize(sum_expenditures = sum(expenditure, na.rm=TRUE)/1000000 ) %>%
pivot_wider(names_from = "group", values_from = "sum_expenditures", names_prefix = "exp_")%>%
# join state employee healthcare and subtract employee premiums
left_join(med_option_recent, by = c("fy" = "year")) %>%
mutate(exp_904_new = exp_904 - med_option_amt_recent) %>% # state employee healthcare
# join local transfers and create exp_970
left_join(transfers, by = c("fy" = "year")) %>%
mutate(exp_970 = exp_971 + exp_972 + exp_975 + exp_976)
ff_exp<- ff_exp %>% select(-c(exp_904, med_option_amt_recent, exp_971:exp_976)) # drop unwanted columns
ff_exp
Create total revenues and total expenditures only:
rev_long and exp_long, expenditures
and revenues are in the same format and can be combined together for the
totals and gap each year.rev_long <- pivot_longer(ff_rev, rev_02:rev_78new, names_to = c("type","Category"), values_to = "Dollars", names_sep = "_") %>%
rename(Year = fy) %>%
mutate(Category_name = case_when(
Category == "02" ~ "INDIVIDUAL INCOME TAXES, gross of local, net of refunds" ,
Category == "03" ~ "CORPORATE INCOME TAXES, gross of PPRT, net of refunds" ,
Category == "06" ~ "SALES TAXES, gross of local share" ,
Category == "09" ~ "MOTOR FUEL TAX, gross of local share, net of refunds" ,
Category == "12" ~ "PUBLIC UTILITY TAXES, gross of PPRT" ,
Category == "15" ~ "CIGARETTE TAXES" ,
Category == "18" ~ "LIQUOR GALLONAGE TAXES" ,
Category == "21" ~ "INHERITANCE TAX" ,
Category == "24" ~ "INSURANCE TAXES&FEES&LICENSES, net of refunds " ,
Category == "27" ~ "CORP FRANCHISE TAXES & FEES" ,
Category == "30" ~ "HORSE RACING TAXES & FEES", # in Other
Category == "31" ~ "MEDICAL PROVIDER ASSESSMENTS" ,
Category == "32" ~ "GARNISHMENT-LEVIES" , # dropped
Category == "33" ~ "LOTTERY RECEIPTS" ,
Category == "35" ~ "OTHER TAXES" ,
Category == "36" ~ "RECEIPTS FROM REVENUE PRODUCNG",
Category == "39" ~ "LICENSES, FEES & REGISTRATIONS" ,
Category == "42" ~ "MOTOR VEHICLE AND OPERATORS" ,
Category == "45" ~ "STUDENT FEES-UNIVERSITIES", # dropped
Category == "48" ~ "RIVERBOAT WAGERING TAXES" ,
Category == "51" ~ "RETIREMENT CONTRIBUTIONS" , # dropped
Category == "54" ~ "GIFTS AND BEQUESTS",
Category == "57" ~ "FEDERAL OTHER" ,
Category == "58" ~ "FEDERAL MEDICAID",
Category == "59" ~ "FEDERAL TRANSPORTATION" ,
Category == "60" ~ "OTHER GRANTS AND CONTRACTS", #other
Category == "63" ~ "INVESTMENT INCOME", # other
Category == "66" ~ "PROCEEDS,INVESTMENT MATURITIES" , #dropped
Category == "72" ~ "BOND ISSUE PROCEEDS", #dropped
Category == "75" ~ "INTER-AGENCY RECEIPTS ", #dropped
Category == "76" ~ "TRANSFER IN FROM OUT FUNDS", #other
Category == "78new" ~ "ALL OTHER SOURCES" ,
Category == "79" ~ "COOK COUNTY IGT", #dropped
Category == "98" ~ "PRIOR YEAR REFUNDS", #dropped
T ~ "Check Me!"
) )
exp_long <- pivot_longer(ff_exp, exp_402:exp_970 , names_to = c("type", "Category"), values_to = "Dollars", names_sep = "_") %>%
rename(Year = fy ) %>%
mutate(Category_name =
case_when(
Category == "402" ~ "AGING" ,
Category == "406" ~ "AGRICULTURE",
Category == "416" ~ "CENTRAL MANAGEMENT",
Category == "418" ~ "CHILDREN AND FAMILY SERVICES",
Category == "420" ~ "COMMERCE AND ECONOMIC OPPORTUNITY",
Category == "422" ~ "NATURAL RESOURCES" ,
Category == "426" ~ "CORRECTIONS",
Category == "427" ~ "EMPLOYMENT SECURITY" ,
Category == "444" ~ "HUMAN SERVICES" ,
Category == "448" ~ "Innovation and Technology", # AWM added fy2022
Category == "478" ~ "HEALTHCARE & FAM SER NET OF MEDICAID",
Category == "482" ~ "PUBLIC HEALTH",
Category == "492" ~ "REVENUE",
Category == "494" ~ "TRANSPORTATION" ,
Category == "532" ~ "ENVIRONMENTAL PROTECT AGENCY" ,
Category == "557" ~ "IL STATE TOLL HIGHWAY AUTH" ,
Category == "684" ~ "IL COMMUNITY COLLEGE BOARD",
Category == "691" ~ "IL STUDENT ASSISTANCE COMM" ,
Category == "900" ~ "NOT IN FRAME",
Category == "901" ~ "STATE PENSION CONTRIBUTION",
Category == "903" ~ "DEBT SERVICE",
Category == "904" ~ "State Employee Healthcare",
Category == "910" ~ "LEGISLATIVE" ,
Category == "920" ~ "JUDICIAL" ,
Category == "930" ~ "ELECTED OFFICERS" ,
Category == "940" ~ "OTHER HEALTH-RELATED",
Category == "941" ~ "PUBLIC SAFETY" ,
Category == "942" ~ "ECON DEVT & INFRASTRUCTURE" ,
Category == "943" ~ "CENTRAL SERVICES",
Category == "944" ~ "BUS & PROFESSION REGULATION" ,
Category == "945" ~ "MEDICAID" ,
Category == "946" ~ "CAPITAL IMPROVEMENT" ,
Category == "948" ~ "OTHER DEPARTMENTS" ,
Category == "949" ~ "OTHER BOARDS & COMMISSIONS" ,
Category == "959" ~ "K-12 EDUCATION" ,
Category == "960" ~ "UNIVERSITY EDUCATION",
Category == "970" ~ "Local Govt Transfers",
T ~ "CHECK ME!")
)
# write_csv(exp_long, "expenditures_recoded_long_FY22.csv")
# write_csv(rev_long, "revenue_recoded_long_FY22.csv")
aggregated_totals_long <- rbind(rev_long, exp_long)
aggregated_totals_long
year_totals <- aggregated_totals_long %>%
group_by(type, Year) %>%
summarize(Dollars = sum(Dollars, na.rm = TRUE)) %>%
pivot_wider(names_from = "type", values_from = Dollars) %>%
rename(
Expenditures = exp,
Revenue = rev) %>%
mutate(Gap = Revenue - Expenditures)
# creates variable for the Gap each year
year_totals
# write_csv(aggregated_totals_long, "aggregated_totals.csv")
Graphs made from aggregated_totals_long dataframe.
aggregated_totals_long %>%
filter(type == "exp") %>% # uses only expenditures
ggplot(aes(x = Year, y = Dollars, group = Category)) +
geom_line()+
xlab("Year") +
ylab("Millions of Dollars") +
ggtitle("Illinois Expenditures by Category")
aggregated_totals_long %>%
filter(type == "rev") %>% #uses only revenues
ggplot(aes(x = Year, y = Dollars, group = Category, label = Category_name)) +
geom_line()+
xlab("Year") +
ylab("Millions of Dollars") +
ggtitle("Illinois Revenues by Category")
year_totals %>%
ggplot() +
# geom_smooth adds regression line, graphed first so it appears behind line graph
geom_smooth(aes(x = Year, y = Revenue), color = "light green", method = "lm", se = FALSE) +
geom_smooth(aes(x = Year, y = Expenditures), color = "gray", method = "lm", se = FALSE) +
# line graph of revenue and expenditures
geom_line(aes(x = Year, y = Revenue), color = "green4") +
geom_line(aes(x = Year, y = Expenditures), color = "black") +
# labels
theme_bw() +
scale_y_continuous(labels = comma)+
xlab("Year") +
ylab("Millions of Dollars") +
ggtitle("Illinois Expenditures and Revenue Totals, 1998-2022")
Expenditure and revenue amounts in millions of dollars, with and without labels:
exp_long %>%
filter(Year == 2021) %>%
#mutate(`Total Expenditures`= sum(Dollars, na.rm = TRUE)) %>%
# select(-c(Year, `Total Expenditures`)) %>%
arrange(desc(`Dollars`)) %>%
ggplot() +
geom_col(aes(x = fct_reorder(Category_name, `Dollars`), y = `Dollars`))+
coord_flip() +
xlab("Expenditure Categories") +
ylab("Millions of Dollars") +
theme_bw()
exp_long %>%
filter(Year == 2021) %>%
#mutate(`Total Expenditures`= sum(Dollars, na.rm = TRUE)) %>%
# select(-c(Year, `Total Expenditures`)) %>%
arrange(desc(`Dollars`)) %>%
ggplot() +
geom_col(aes(x = fct_reorder(Category, `Dollars`), y = `Dollars`))+
coord_flip() +
xlab("Expenditure Categories") +
ylab("Millions of Dollars") +
theme_bw()
rev_long %>%
filter(Year == 2021) %>%
#mutate(`Total Expenditures`= sum(Dollars, na.rm = TRUE)) %>%
# select(-c(Year, `Total Expenditures`)) %>%
arrange(desc(`Dollars`)) %>%
ggplot() +
geom_col(aes(x = fct_reorder(Category_name, `Dollars`), y = `Dollars`))+
coord_flip() +
xlab("Revenue Categories") +
ylab("Millions of Dollars") +
theme_bw()
rev_long %>%
filter(Year == 2021) %>%
#mutate(`Total Expenditures`= sum(Dollars, na.rm = TRUE)) %>%
# select(-c(Year, `Total Expenditures`)) %>%
arrange(desc(`Dollars`)) %>%
ggplot() +
geom_col(aes(x = fct_reorder(Category, `Dollars`), y = `Dollars`))+
coord_flip() +
xlab("Revenue Categories") +
ylab("Millions of Dollars") +
theme_bw()
Expenditure and revenues when focusing on largest categories and combining others into “All Other Expenditures(Revenues)”:
exp_long %>%
filter( Year == 2021) %>%
mutate(rank = rank(Dollars),
Category_name = ifelse(rank > 13, Category_name, 'All Other Expenditures')) %>%
# select(-c(Year, Dollars, rank)) %>%
arrange(desc(Dollars)) %>%
ggplot() +
geom_col(aes(x = fct_reorder(Category_name, `Dollars`), y = `Dollars`), fill = "light green")+
coord_flip() +
xlab("") +
theme_bw()
rev_long %>%
filter( Year == 2021) %>%
mutate(rank = rank(Dollars),
Category_name = ifelse(rank > 10, Category_name, 'All Other Expenditures')) %>%
# select(-c(Year, Dollars, rank)) %>%
arrange(desc(Dollars)) %>%
ggplot() +
geom_col(aes(x = fct_reorder(Category_name, `Dollars`), y = `Dollars`), fill = "light blue")+
coord_flip() +
xlab("") +
theme_bw()
Keeping the top 13 categories and grouping the rest to All Other Expenditures(Revenues). Shown as a percentage of total expenditures(revenues)
exp_long %>%
filter( Year == 2021) %>%
mutate(`Total Expenditures` = sum(Dollars, na.rm = TRUE),
`Percent of Total Expenditures` = round((Dollars / `Total Expenditures`*100), 2),
rank = rank(-Dollars),
Category = ifelse(rank <= 13, Category, 'All Other Expenditures')) %>%
select(-c(Year, `Total Expenditures`, rank)) %>%
arrange(desc(`Percent of Total Expenditures`)) %>%
ggplot() +
geom_col(aes(x = fct_reorder(Category, `Percent of Total Expenditures`), y = `Percent of Total Expenditures`), fill = "light green")+
coord_flip() +
xlab("") +
ylab("Percent of Total Expenditure") +
theme_bw()
exp_long %>%
filter( Year == 2021) %>%
mutate(`Total Expenditures` = sum(Dollars, na.rm = TRUE),
`Percent of Total Expenditures` = round((Dollars / `Total Expenditures`*100), 2),
rank = rank(-Dollars),
Category_name = ifelse(rank <= 13, Category_name, 'All Other Expendiures')) %>%
select(-c(Year, `Total Expenditures`, rank)) %>%
arrange(desc(`Percent of Total Expenditures`)) %>%
ggplot() +
geom_col(aes(x = fct_reorder(Category_name, `Percent of Total Expenditures`), y = `Percent of Total Expenditures`), fill = "light green")+
coord_flip() +
xlab("")+
ylab("Percent of Total Expenditure") +
theme_bw()
Each year, you will need to update the CAGR formulas!
calc_cagr is a function created for calculating the
CAGRs for different spans of time.
# function for calculating the CAGR
calc_cagr <- function(df, n) {
df <- exp_long %>%
select(-type) %>%
arrange(Category_name, Year) %>%
group_by(Category_name) %>%
mutate(cagr = ((`Dollars` / lag(`Dollars`, n)) ^ (1 / n)) - 1)
return(df)
}
# This works for one variable at a time
cagr_23 <- calc_cagr(exp_long, 23) %>%
# group_by(Category) %>%
summarize(cagr_23 = round(sum(cagr*100, na.rm = TRUE), 2))
cagr23_precovid <- exp_long %>%
filter(Year <= 2019) %>%
calc_cagr(21) %>%
summarize(cagr_21 = round(sum(cagr*100, na.rm = TRUE), 2))
cagr_10 <- calc_cagr(exp_long, 10) %>%
filter(Year == 2021) %>%
summarize(cagr_10 = case_when(Year == 2021 ~ round(sum(cagr*100, na.rm = TRUE), 2)))
cagr_5 <- calc_cagr(exp_long, 5) %>%
filter(Year == 2021) %>%
summarize(cagr_5 = case_when(Year == 2021 ~ round(sum(cagr*100, na.rm = TRUE), 2)))
cagr_3 <- calc_cagr(exp_long, 3) %>%
filter(Year == 2021) %>%
summarize(cagr_3 = case_when(Year == 2021 ~ round(sum(cagr*100, na.rm = TRUE), 2)))
cagr_2 <- calc_cagr(exp_long, 2) %>%
filter(Year == 2021) %>%
summarize(cagr_2 = case_when(Year == 2021 ~ round(sum(cagr*100, na.rm = TRUE), 2)))
cagr_1 <- calc_cagr(exp_long, 1) %>%
filter(Year == 2021) %>%
summarize(cagr_1 = case_when(Year == 2021 ~ round(sum(cagr*100, na.rm = TRUE), 2)))
CAGR_expenditures_summary <- data.frame(cagr_1, cagr_2, cagr_3, cagr_5, cagr_10, cagr_23 ) %>%
select(-c(Category_name.1, Category_name.2, Category_name.3, Category_name.4, Category_name.5 )) %>%
rename("Expenditure Category" = Category_name, "1 Year CAGR" = cagr_1, "2 Year CAGR" = cagr_2, "3 Year CAGR" = cagr_3, "5 Year CAGR" = cagr_5, "10 Year CAGR" = cagr_10,"23 Year CAGR" = cagr_23 )
CAGR_expenditures_summary
# to have it as a csv, uncomment the line below
write_csv(CAGR_expenditures_summary, "CAGR_expenditures_summary.csv")
calc_cagr <- function(df, n) {
df <- rev_long %>%
arrange(Category_name, Year) %>%
group_by(Category_name) %>%
mutate(cagr = ((Dollars / lag(Dollars, n)) ^ (1 / n)) - 1)
return(df)
}
# This works for one variable at a time
cagr_23 <- calc_cagr(rev_long, 23) %>%
# group_by(Category) %>%
summarize(cagr_23 = round(sum(cagr*100, na.rm = TRUE), 2))
cagr_10 <- calc_cagr(rev_long, 10) %>%
filter(Year == 2021) %>%
summarize(cagr_10 = case_when(Year == 2021 ~ round(sum(cagr*100, na.rm = TRUE), 2)))
cagr_5 <- calc_cagr(rev_long, 5) %>%
filter(Year == 2021) %>%
summarize(cagr_5 = case_when(Year == 2021 ~ round(sum(cagr*100, na.rm = TRUE), 2)))
cagr_3 <- calc_cagr(rev_long, 3) %>%
filter(Year == 2021) %>%
summarize(cagr_3 = case_when(Year == 2021 ~ round(sum(cagr*100, na.rm = TRUE), 2)))
cagr_2 <- calc_cagr(rev_long, 2) %>%
filter(Year == 2021) %>%
summarize(cagr_2 = case_when(Year == 2021 ~ round(sum(cagr*100, na.rm = TRUE), 2)))
cagr_1 <- calc_cagr(rev_long, 1) %>%
filter(Year == 2021) %>%
summarize(cagr_1 = case_when(Year == 2021 ~ round(sum(cagr*100, na.rm = TRUE), 2)))
CAGR_revenue_summary <- data.frame(cagr_1, cagr_2, cagr_3, cagr_5, cagr_10, cagr_23) %>%
select(-c(Category_name.1, Category_name.2, Category_name.3, Category_name.4, Category_name.5 )) %>%
rename("Revenue Category" = Category_name, "1 Year CAGR" = cagr_1, "2 Year CAGR" = cagr_2, "3 Year CAGR" = cagr_3, "5 Year CAGR" = cagr_5, "10 Year CAGR" = cagr_10,"23 Year CAGR" = cagr_23 )
CAGR_revenue_summary
# to have it as a csv, uncomment the line below
write_csv(CAGR_revenue_summary, "CAGR_revenue_summary.csv")
rm(cagr_1, cagr_2, cagr_3, cagr_5, cagr_10, cagr_23)
Expenditure and Revenue Growth using a lag formula:
exp_long %>%
group_by(Category_name) %>%
mutate(Growth = ((Dollars) - lag(Dollars))/lag(Dollars) *100) %>%
summarize(Growth = round(mean(Growth, na.rm = TRUE), 2))
rev_long %>%
group_by(Category_name) %>%
mutate(Growth = ((Dollars) - lag(Dollars))/lag(Dollars) *100) %>%
summarize(Growth = round(mean(Growth, na.rm = TRUE), 2))
Final column not done yet
revenue_change <- rev_long %>%
select(-c(type,Category)) %>%
filter(Year > 2019) %>%
pivot_wider(names_from = Year , values_from = Dollars, names_prefix = "Dollars_") %>%
mutate("Change from 2020 to 2021" = Dollars_2021 - Dollars_2020,
"Percent Change from 2020 to 2021" = (Dollars_2021 -Dollars_2020)/Dollars_2020) %>%
left_join(CAGR_revenue_summary, by = c("Category_name" = "Revenue Category")) %>%
select(-c(Dollars_2020,`1 Year CAGR`:`10 Year CAGR`))
revenue_change
expenditure_change <- exp_long %>%
select(-c(type,Category)) %>%
filter(Year > 2019) %>%
pivot_wider(names_from = Year , values_from = Dollars, names_prefix = "Dollars_") %>%
mutate("Change from 2020 to 2021" = Dollars_2021 - Dollars_2020,
"Percent Change from 2020 to 2021" = (Dollars_2021 -Dollars_2020)/Dollars_2020) %>%
left_join(CAGR_expenditures_summary, by = c("Category_name" = "Expenditure Category")) %>%
select(-c(Dollars_2020,`1 Year CAGR`:`10 Year CAGR`))
expenditure_change
Saves main items in one excel file named
summary_file.xlsx. Delete eval=FALSE to run on
local computer.
#install.packages("openxlsx")
library(openxlsx)
dataset_names <- list('rev_long' = rev_long, 'exp_long' = exp_long,
`Table 1` = expenditure_change, `Table 2` = revenue_change,
'Table 4.a' = CAGR_revenue_summary, 'Table 4.b' = CAGR_expenditures_summary,
'year_totals' = year_totals)
write.xlsx(dataset_names, file = 'summary_file_AWM_v2.xlsx')